[llvm] cf0aa0b - [NFC][PowerPC] Added test to check regsiter allocation for ACC registers
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 14 03:22:31 PDT 2021
Author: Stefan Pintilie
Date: 2021-07-14T05:22:24-05:00
New Revision: cf0aa0b66ccec87b0ac14f632998074e507be950
URL: https://github.com/llvm/llvm-project/commit/cf0aa0b66ccec87b0ac14f632998074e507be950
DIFF: https://github.com/llvm/llvm-project/commit/cf0aa0b66ccec87b0ac14f632998074e507be950.diff
LOG: [NFC][PowerPC] Added test to check regsiter allocation for ACC registers
ACC regsiters are a combination of 4 consecutive vector regsiters and therefore
somtimes require special treatment for register allocation. This patch only
adds a test.
Added:
llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
new file mode 100644
index 0000000000000..ace652d503ae9
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
@@ -0,0 +1,352 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
+; RUN: | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN: -ppc-track-subreg-liveness < %s | FileCheck %s --check-prefix=TRACKLIVE
+
+%0 = type <{ double }>
+%1 = type <{ double }>
+
+define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unnamed_addr {
+; CHECK-LABEL: acc_regalloc:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: lwz r3, 0(r3)
+; CHECK-NEXT: lxv vs0, 0(0)
+; CHECK-NEXT: xxlxor vs2, vs2, vs2
+; CHECK-NEXT: xxlxor vs3, vs3, vs3
+; CHECK-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
+; CHECK-NEXT: xxlxor v2, v2, v2
+; CHECK-NEXT: li r6, 1
+; CHECK-NEXT: li r4, 16
+; CHECK-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill
+; CHECK-NEXT: extswsli r3, r3, 3
+; CHECK-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill
+; CHECK-NEXT: xvmaddadp vs3, vs0, vs3
+; CHECK-NEXT: lxvdsx vs1, 0, r3
+; CHECK-NEXT: xvmaddadp vs2, vs1, vs2
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: # %bb9
+; CHECK-NEXT: #
+; CHECK-NEXT: addi r6, r6, 2
+; CHECK-NEXT: lxv vs5, -64(r5)
+; CHECK-NEXT: lxv vs6, -16(r5)
+; CHECK-NEXT: lxv vs4, 16(0)
+; CHECK-NEXT: xxlor v7, vs2, vs2
+; CHECK-NEXT: xxlxor v8, v8, v8
+; CHECK-NEXT: xxlxor v1, v1, v1
+; CHECK-NEXT: mulld r6, r6, r3
+; CHECK-NEXT: xvmaddadp v7, vs5, v2
+; CHECK-NEXT: xxlxor v6, v6, v6
+; CHECK-NEXT: xvmaddadp v8, vs6, v8
+; CHECK-NEXT: xvmaddadp v1, vs4, vs1
+; CHECK-NEXT: xvmuldp v0, vs4, v2
+; CHECK-NEXT: xvmaddadp v1, v2, v2
+; CHECK-NEXT: xvmaddadp v0, v2, v2
+; CHECK-NEXT: lxvdsx v4, r6, r4
+; CHECK-NEXT: xvmaddadp v6, vs5, v6
+; CHECK-NEXT: li r6, 0
+; CHECK-NEXT: xvmuldp v9, vs6, v4
+; CHECK-NEXT: xvmuldp v3, vs5, v4
+; CHECK-NEXT: xvmuldp v11, vs0, v4
+; CHECK-NEXT: vmr v10, v2
+; CHECK-NEXT: xvmuldp v5, v4, v2
+; CHECK-NEXT: vmr v4, v2
+; CHECK-NEXT: xxlor vs18, v8, v8
+; CHECK-NEXT: xxlor vs4, v2, v2
+; CHECK-NEXT: xxlor vs12, v10, v10
+; CHECK-NEXT: xxlor vs13, v11, v11
+; CHECK-NEXT: xxlor v10, vs3, vs3
+; CHECK-NEXT: xxlor vs8, v4, v4
+; CHECK-NEXT: xxlor vs9, v5, v5
+; CHECK-NEXT: xxlor vs10, v0, v0
+; CHECK-NEXT: xxlor vs11, v1, v1
+; CHECK-NEXT: xxmtacc acc2
+; CHECK-NEXT: xxlor vs19, v9, v9
+; CHECK-NEXT: vmr v8, v2
+; CHECK-NEXT: xxlor vs5, v3, v3
+; CHECK-NEXT: xxlor vs6, v6, v6
+; CHECK-NEXT: xxlor vs7, v7, v7
+; CHECK-NEXT: xxlor vs14, v10, v10
+; CHECK-NEXT: xxlor vs15, v11, v11
+; CHECK-NEXT: xxlor vs16, v8, v8
+; CHECK-NEXT: xxlor vs17, v9, v9
+; CHECK-NEXT: xxmtacc acc1
+; CHECK-NEXT: xxmtacc acc3
+; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
+; CHECK-NEXT: xxmtacc acc4
+; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
+; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT: xxmfacc acc1
+; CHECK-NEXT: xxmfacc acc2
+; CHECK-NEXT: xxmfacc acc3
+; CHECK-NEXT: xxmfacc acc4
+; CHECK-NEXT: stxv vs5, 0(r3)
+; CHECK-NEXT: stxv vs13, 32(r3)
+; CHECK-NEXT: stxv vs8, 16(0)
+; CHECK-NEXT: stxv vs16, 48(0)
+; CHECK-NEXT: b .LBB0_1
+;
+; TRACKLIVE-LABEL: acc_regalloc:
+; TRACKLIVE: # %bb.0: # %bb
+; TRACKLIVE-NEXT: lwz r3, 0(r3)
+; TRACKLIVE-NEXT: lxv vs0, 0(0)
+; TRACKLIVE-NEXT: xxlxor vs2, vs2, vs2
+; TRACKLIVE-NEXT: xxlxor vs3, vs3, vs3
+; TRACKLIVE-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
+; TRACKLIVE-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
+; TRACKLIVE-NEXT: xxlxor v2, v2, v2
+; TRACKLIVE-NEXT: li r6, 1
+; TRACKLIVE-NEXT: li r4, 16
+; TRACKLIVE-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill
+; TRACKLIVE-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill
+; TRACKLIVE-NEXT: extswsli r3, r3, 3
+; TRACKLIVE-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill
+; TRACKLIVE-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill
+; TRACKLIVE-NEXT: xvmaddadp vs3, vs0, vs3
+; TRACKLIVE-NEXT: lxvdsx vs1, 0, r3
+; TRACKLIVE-NEXT: xvmaddadp vs2, vs1, vs2
+; TRACKLIVE-NEXT: .p2align 4
+; TRACKLIVE-NEXT: .LBB0_1: # %bb9
+; TRACKLIVE-NEXT: #
+; TRACKLIVE-NEXT: addi r6, r6, 2
+; TRACKLIVE-NEXT: lxv vs4, 16(0)
+; TRACKLIVE-NEXT: xxlxor v1, v1, v1
+; TRACKLIVE-NEXT: lxv vs6, -16(r5)
+; TRACKLIVE-NEXT: lxv vs5, -64(r5)
+; TRACKLIVE-NEXT: xxlxor v8, v8, v8
+; TRACKLIVE-NEXT: xxlor v7, vs2, vs2
+; TRACKLIVE-NEXT: xxlxor v6, v6, v6
+; TRACKLIVE-NEXT: mulld r6, r6, r3
+; TRACKLIVE-NEXT: vmr v10, v2
+; TRACKLIVE-NEXT: xxlor vs8, v10, v10
+; TRACKLIVE-NEXT: xvmaddadp v1, vs4, vs1
+; TRACKLIVE-NEXT: xvmuldp v0, vs4, v2
+; TRACKLIVE-NEXT: xvmaddadp v8, vs6, v8
+; TRACKLIVE-NEXT: xvmaddadp v7, vs5, v2
+; TRACKLIVE-NEXT: xvmaddadp v6, vs5, v6
+; TRACKLIVE-NEXT: xxlor vs4, v2, v2
+; TRACKLIVE-NEXT: lxvdsx v4, r6, r4
+; TRACKLIVE-NEXT: li r6, 0
+; TRACKLIVE-NEXT: xvmaddadp v1, v2, v2
+; TRACKLIVE-NEXT: xvmaddadp v0, v2, v2
+; TRACKLIVE-NEXT: xxlor vs18, v8, v8
+; TRACKLIVE-NEXT: vmr v8, v2
+; TRACKLIVE-NEXT: xxlor vs7, v7, v7
+; TRACKLIVE-NEXT: xxlor vs16, v8, v8
+; TRACKLIVE-NEXT: xvmuldp v3, vs5, v4
+; TRACKLIVE-NEXT: xvmuldp v5, vs0, v4
+; TRACKLIVE-NEXT: xvmuldp v9, vs6, v4
+; TRACKLIVE-NEXT: xvmuldp v11, v4, v2
+; TRACKLIVE-NEXT: vmr v4, v2
+; TRACKLIVE-NEXT: xxlor vs6, v6, v6
+; TRACKLIVE-NEXT: xxlor vs12, v4, v4
+; TRACKLIVE-NEXT: xxlor v4, vs3, vs3
+; TRACKLIVE-NEXT: xxlor vs10, v0, v0
+; TRACKLIVE-NEXT: xxlor vs11, v1, v1
+; TRACKLIVE-NEXT: xxlor vs14, v4, v4
+; TRACKLIVE-NEXT: xxlor vs5, v3, v3
+; TRACKLIVE-NEXT: xxlor vs9, v11, v11
+; TRACKLIVE-NEXT: xxlor vs13, v5, v5
+; TRACKLIVE-NEXT: xxlor vs15, v5, v5
+; TRACKLIVE-NEXT: xxlor vs19, v9, v9
+; TRACKLIVE-NEXT: xxlor vs17, v9, v9
+; TRACKLIVE-NEXT: xxmtacc acc1
+; TRACKLIVE-NEXT: xxmtacc acc2
+; TRACKLIVE-NEXT: xxmtacc acc3
+; TRACKLIVE-NEXT: xxmtacc acc4
+; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
+; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT: xxmfacc acc1
+; TRACKLIVE-NEXT: xxmfacc acc2
+; TRACKLIVE-NEXT: xxmfacc acc3
+; TRACKLIVE-NEXT: xxmfacc acc4
+; TRACKLIVE-NEXT: stxv vs5, 0(r3)
+; TRACKLIVE-NEXT: stxv vs13, 32(r3)
+; TRACKLIVE-NEXT: stxv vs8, 16(0)
+; TRACKLIVE-NEXT: stxv vs16, 48(0)
+; TRACKLIVE-NEXT: b .LBB0_1
+bb:
+ %i = load i32, i32* %arg, align 4
+ %i3 = sext i32 %i to i64
+ %i4 = shl nsw i64 %i3, 3
+ %i5 = bitcast [0 x %0]* %arg1 to i8*
+ %i6 = getelementptr i8, i8* %i5, i64 undef
+ %i7 = getelementptr [0 x %1], [0 x %1]* %arg2, i64 0, i64 -8
+ %i8 = getelementptr i8, i8* %i6, i64 undef
+ br label %bb9
+
+bb9: ; preds = %bb95, %bb
+ %i10 = phi i64 [ 1, %bb ], [ 0, %bb95 ]
+ %i11 = getelementptr %1, %1* null, i64 2
+ %i12 = bitcast %1* %i11 to <2 x double>*
+ %i13 = load <2 x double>, <2 x double>* %i12, align 1
+ %i14 = add nuw nsw i64 %i10, 2
+ %i15 = getelementptr inbounds %1, %1* %i7, i64 undef
+ %i16 = bitcast %1* %i15 to <2 x double>*
+ %i17 = load <2 x double>, <2 x double>* %i16, align 1
+ %i18 = load <2 x double>, <2 x double>* null, align 1
+ %i19 = getelementptr %1, %1* %i15, i64 6
+ %i20 = bitcast %1* %i19 to <2 x double>*
+ %i21 = load <2 x double>, <2 x double>* %i20, align 1
+ %i22 = load i64, i64* undef, align 8
+ %i23 = insertelement <2 x i64> poison, i64 %i22, i32 0
+ %i24 = bitcast <2 x i64> %i23 to <2 x double>
+ %i25 = shufflevector <2 x double> %i24, <2 x double> undef, <2 x i32> zeroinitializer
+ %i26 = mul i64 %i14, %i4
+ %i27 = getelementptr i8, i8* null, i64 %i26
+ %i28 = getelementptr inbounds i8, i8* %i27, i64 0
+ %i29 = getelementptr i8, i8* %i28, i64 16
+ %i30 = bitcast i8* %i29 to i64*
+ %i31 = load i64, i64* %i30, align 8
+ %i32 = insertelement <2 x i64> poison, i64 %i31, i32 0
+ %i33 = bitcast <2 x i64> %i32 to <2 x double>
+ %i34 = shufflevector <2 x double> %i33, <2 x double> undef, <2 x i32> zeroinitializer
+ %i35 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> %i25, <2 x double> zeroinitializer)
+ %i36 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i13, <2 x double> %i25, <2 x double> zeroinitializer)
+ %i37 = fmul contract <2 x double> %i13, zeroinitializer
+ %i38 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i17, <2 x double> zeroinitializer, <2 x double> %i35)
+ %i39 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> %i36)
+ %i40 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i17, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
+ %i41 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> %i37)
+ %i42 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i18, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
+ %i43 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i21, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
+ %i44 = fmul contract <2 x double> %i17, %i34
+ %i45 = fmul contract <2 x double> zeroinitializer, %i34
+ %i46 = fmul contract <2 x double> %i18, %i34
+ %i47 = fmul contract <2 x double> %i21, %i34
+ %i48 = bitcast <2 x double> %i44 to <16 x i8>
+ %i49 = bitcast <2 x double> %i40 to <16 x i8>
+ %i50 = bitcast <2 x double> %i38 to <16 x i8>
+ %i51 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i48, <16 x i8> %i49, <16 x i8> %i50)
+ %i52 = bitcast <2 x double> %i45 to <16 x i8>
+ %i53 = bitcast <2 x double> %i41 to <16 x i8>
+ %i54 = bitcast <2 x double> %i39 to <16 x i8>
+ %i55 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i52, <16 x i8> %i53, <16 x i8> %i54)
+ %i56 = bitcast <2 x double> %i46 to <16 x i8>
+ %i57 = bitcast <2 x double> %i42 to <16 x i8>
+ %i58 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i56, <16 x i8> %i57, <16 x i8> %i56)
+ %i59 = bitcast <2 x double> %i47 to <16 x i8>
+ %i60 = bitcast <2 x double> %i43 to <16 x i8>
+ %i61 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i59, <16 x i8> %i60, <16 x i8> %i59)
+ %i62 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i51, <256 x i1> undef, <16 x i8> undef)
+ %i63 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i55, <256 x i1> undef, <16 x i8> undef)
+ %i64 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i58, <256 x i1> undef, <16 x i8> undef)
+ %i65 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i61, <256 x i1> undef, <16 x i8> undef)
+ %i66 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i62, <256 x i1> undef, <16 x i8> undef)
+ %i67 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i63, <256 x i1> undef, <16 x i8> undef)
+ %i68 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i64, <256 x i1> undef, <16 x i8> undef)
+ %i69 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i65, <256 x i1> undef, <16 x i8> undef)
+ %i70 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i66, <256 x i1> undef, <16 x i8> undef)
+ %i71 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i67, <256 x i1> undef, <16 x i8> undef)
+ %i72 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i68, <256 x i1> undef, <16 x i8> undef)
+ %i73 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i69, <256 x i1> undef, <16 x i8> undef)
+ %i74 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i70, <256 x i1> undef, <16 x i8> undef)
+ %i75 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i71, <256 x i1> undef, <16 x i8> undef)
+ %i76 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i72, <256 x i1> undef, <16 x i8> undef)
+ %i77 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i73, <256 x i1> undef, <16 x i8> undef)
+ %i78 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i74, <256 x i1> undef, <16 x i8> undef)
+ %i79 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i75, <256 x i1> undef, <16 x i8> undef)
+ %i80 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i76, <256 x i1> undef, <16 x i8> undef)
+ %i81 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i77, <256 x i1> undef, <16 x i8> undef)
+ br label %bb82
+
+bb82: ; preds = %bb82, %bb9
+ %i83 = phi <512 x i1> [ %i94, %bb82 ], [ %i81, %bb9 ]
+ %i84 = phi <512 x i1> [ %i93, %bb82 ], [ %i80, %bb9 ]
+ %i85 = phi <512 x i1> [ %i92, %bb82 ], [ %i79, %bb9 ]
+ %i86 = phi <512 x i1> [ %i91, %bb82 ], [ %i78, %bb9 ]
+ %i87 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i86, <256 x i1> undef, <16 x i8> undef)
+ %i88 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i85, <256 x i1> undef, <16 x i8> undef)
+ %i89 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i84, <256 x i1> undef, <16 x i8> undef)
+ %i90 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i83, <256 x i1> undef, <16 x i8> undef)
+ %i91 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i87, <256 x i1> undef, <16 x i8> undef)
+ %i92 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i88, <256 x i1> undef, <16 x i8> undef)
+ %i93 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i89, <256 x i1> undef, <16 x i8> undef)
+ %i94 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i90, <256 x i1> undef, <16 x i8> undef)
+ br i1 undef, label %bb95, label %bb82
+
+bb95: ; preds = %bb82
+ %i96 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i91)
+ %i97 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i96, 2
+ %i98 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i92)
+ %i99 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i98, 3
+ %i100 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i93)
+ %i101 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i100, 2
+ %i102 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i94)
+ %i103 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i102, 3
+ %i104 = getelementptr inbounds i8, i8* %i8, i64 undef
+ %i105 = bitcast i8* %i104 to <16 x i8>*
+ store <16 x i8> %i97, <16 x i8>* %i105, align 1
+ %i106 = getelementptr i8, i8* %i104, i64 32
+ %i107 = bitcast i8* %i106 to <16 x i8>*
+ store <16 x i8> %i101, <16 x i8>* %i107, align 1
+ %i108 = getelementptr i8, i8* null, i64 16
+ %i109 = bitcast i8* %i108 to <16 x i8>*
+ store <16 x i8> %i99, <16 x i8>* %i109, align 1
+ %i110 = getelementptr i8, i8* null, i64 48
+ %i111 = bitcast i8* %i110 to <16 x i8>*
+ store <16 x i8> %i103, <16 x i8>* %i111, align 1
+ br label %bb9
+}
+
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
+
More information about the llvm-commits
mailing list