[llvm] 4c921aa - [X86] Name instructions in test (NFC)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 22 07:53:27 PDT 2022
Author: Nikita Popov
Date: 2022-06-22T16:53:15+02:00
New Revision: 4c921aa3f554b23e7ddd501f3f6fded0ffa32703
URL: https://github.com/llvm/llvm-project/commit/4c921aa3f554b23e7ddd501f3f6fded0ffa32703
DIFF: https://github.com/llvm/llvm-project/commit/4c921aa3f554b23e7ddd501f3f6fded0ffa32703.diff
LOG: [X86] Name instructions in test (NFC)
Run the test through -instnamer, to make it easier to modify.
Added:
Modified:
llvm/test/CodeGen/X86/AMX/amx-gemm.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/AMX/amx-gemm.ll b/llvm/test/CodeGen/X86/AMX/amx-gemm.ll
index 8f5d0c738318..ad374394290a 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-gemm.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-gemm.ll
@@ -25,7 +25,6 @@
; CHECK: ldtilecfg
-; Function Attrs: noinline nounwind uwtable
define dso_local void @inner_product(i32* %A_mem, i32* %B_mem, i32* %C_mem, i32 %M, i32 %N, i32 %K) local_unnamed_addr {
entry:
%mul = shl i32 %K, 4
@@ -46,20 +45,20 @@ for.cond3.preheader.preheader: ; preds = %entry
%wide.trip.count207 = zext i32 %div209 to i64
%wide.trip.count203 = zext i32 %div1 to i64
%wide.trip.count = zext i32 %div2 to i64
- %0 = add nsw i64 %wide.trip.count, -1
+ %i = add nsw i64 %wide.trip.count, -1
%xtraiter = and i64 %wide.trip.count, 7
- %1 = icmp ult i64 %0, 7
+ %i1 = icmp ult i64 %i, 7
%unroll_iter = and i64 %wide.trip.count, 4294967288
%lcmp.mod.not = icmp eq i64 %xtraiter, 0
br label %for.cond3.preheader
-for.cond3.preheader: ; preds = %for.cond3.preheader.preheader, %for.cond.cleanup5
+for.cond3.preheader: ; preds = %for.cond.cleanup5, %for.cond3.preheader.preheader
%indvars.iv205 = phi i64 [ 0, %for.cond3.preheader.preheader ], [ %indvars.iv.next206, %for.cond.cleanup5 ]
- %2 = trunc i64 %indvars.iv205 to i32
- %mul11 = mul i32 %mul, %2
+ %i2 = trunc i64 %indvars.iv205 to i32
+ %mul11 = mul i32 %mul, %i2
%idx.ext = sext i32 %mul11 to i64
%add.ptr = getelementptr inbounds i32, i32* %A_mem, i64 %idx.ext
- %mul26 = mul i32 %mul25, %2
+ %mul26 = mul i32 %mul25, %i2
%idx.ext27 = sext i32 %mul26 to i64
%add.ptr28 = getelementptr inbounds i32, i32* %C_mem, i64 %idx.ext27
br i1 %cmp4173, label %for.body6, label %for.cond.cleanup5
@@ -72,132 +71,132 @@ for.cond.cleanup5: ; preds = %for.cond.cleanup9,
%exitcond208.not = icmp eq i64 %indvars.iv.next206, %wide.trip.count207
br i1 %exitcond208.not, label %for.cond.cleanup, label %for.cond3.preheader
-for.body6: ; preds = %for.cond3.preheader, %for.cond.cleanup9
+for.body6: ; preds = %for.cond.cleanup9, %for.cond3.preheader
%indvars.iv199 = phi i64 [ %indvars.iv.next200, %for.cond.cleanup9 ], [ 0, %for.cond3.preheader ]
- %3 = tail call x86_amx @llvm.x86.tilezero.internal(i16 16, i16 64)
- %4 = shl nsw i64 %indvars.iv199, 4
+ %i3 = tail call x86_amx @llvm.x86.tilezero.internal(i16 16, i16 64)
+ %i4 = shl nsw i64 %indvars.iv199, 4
br i1 %cmp8163, label %for.body10.preheader, label %for.cond.cleanup9
for.body10.preheader: ; preds = %for.body6
- %add.ptr19 = getelementptr inbounds i32, i32* %B_mem, i64 %4
- br i1 %1, label %for.cond.cleanup9.loopexit.unr-lcssa, label %for.body10
+ %add.ptr19 = getelementptr inbounds i32, i32* %B_mem, i64 %i4
+ br i1 %i1, label %for.cond.cleanup9.loopexit.unr-lcssa, label %for.body10
for.cond.cleanup9.loopexit.unr-lcssa: ; preds = %for.body10, %for.body10.preheader
- %.lcssa.ph = phi x86_amx [ undef, %for.body10.preheader ], [ %68, %for.body10 ]
+ %.lcssa.ph = phi x86_amx [ undef, %for.body10.preheader ], [ %i68, %for.body10 ]
%indvars.iv.unr = phi i64 [ 0, %for.body10.preheader ], [ %indvars.iv.next.7, %for.body10 ]
- %c.sroa.8127.2.in164.unr = phi x86_amx [ %3, %for.body10.preheader ], [ %68, %for.body10 ]
+ %c.sroa.8127.2.in164.unr = phi x86_amx [ %i3, %for.body10.preheader ], [ %i68, %for.body10 ]
br i1 %lcmp.mod.not, label %for.cond.cleanup9, label %for.body10.epil
-for.body10.epil: ; preds = %for.cond.cleanup9.loopexit.unr-lcssa, %for.body10.epil
+for.body10.epil: ; preds = %for.body10.epil, %for.cond.cleanup9.loopexit.unr-lcssa
%indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body10.epil ], [ %indvars.iv.unr, %for.cond.cleanup9.loopexit.unr-lcssa ]
- %c.sroa.8127.2.in164.epil = phi x86_amx [ %11, %for.body10.epil ], [ %c.sroa.8127.2.in164.unr, %for.cond.cleanup9.loopexit.unr-lcssa ]
+ %c.sroa.8127.2.in164.epil = phi x86_amx [ %i11, %for.body10.epil ], [ %c.sroa.8127.2.in164.unr, %for.cond.cleanup9.loopexit.unr-lcssa ]
%epil.iter = phi i64 [ %epil.iter.sub, %for.body10.epil ], [ %xtraiter, %for.cond.cleanup9.loopexit.unr-lcssa ]
- %5 = shl nsw i64 %indvars.iv.epil, 4
- %add.ptr14.epil = getelementptr inbounds i32, i32* %add.ptr, i64 %5
- %6 = bitcast i32* %add.ptr14.epil to i8*
- %7 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* %6, i64 %mul15)
- %8 = mul nsw i64 %5, %conv23
- %add.ptr22.epil = getelementptr inbounds i32, i32* %add.ptr19, i64 %8
- %9 = bitcast i32* %add.ptr22.epil to i8*
- %10 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* %9, i64 %mul24)
- %11 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %c.sroa.8127.2.in164.epil, x86_amx %7, x86_amx %10)
+ %i5 = shl nsw i64 %indvars.iv.epil, 4
+ %add.ptr14.epil = getelementptr inbounds i32, i32* %add.ptr, i64 %i5
+ %i6 = bitcast i32* %add.ptr14.epil to i8*
+ %i7 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* %i6, i64 %mul15)
+ %i8 = mul nsw i64 %i5, %conv23
+ %add.ptr22.epil = getelementptr inbounds i32, i32* %add.ptr19, i64 %i8
+ %i9 = bitcast i32* %add.ptr22.epil to i8*
+ %i10 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* %i9, i64 %mul24)
+ %i11 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %c.sroa.8127.2.in164.epil, x86_amx %i7, x86_amx %i10)
%indvars.iv.next.epil = add nuw nsw i64 %indvars.iv.epil, 1
%epil.iter.sub = add i64 %epil.iter, -1
%epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0
br i1 %epil.iter.cmp.not, label %for.cond.cleanup9, label %for.body10.epil
-for.cond.cleanup9: ; preds = %for.cond.cleanup9.loopexit.unr-lcssa, %for.body10.epil, %for.body6
- %c.sroa.8127.2.in.lcssa = phi x86_amx [ %3, %for.body6 ], [ %.lcssa.ph, %for.cond.cleanup9.loopexit.unr-lcssa ], [ %11, %for.body10.epil ]
- %add.ptr31 = getelementptr inbounds i32, i32* %add.ptr28, i64 %4
- %12 = bitcast i32* %add.ptr31 to i8*
- tail call void @llvm.x86.tilestored64.internal(i16 16, i16 64, i8* %12, i64 %mul24, x86_amx %c.sroa.8127.2.in.lcssa)
+for.cond.cleanup9: ; preds = %for.body10.epil, %for.cond.cleanup9.loopexit.unr-lcssa, %for.body6
+ %c.sroa.8127.2.in.lcssa = phi x86_amx [ %i3, %for.body6 ], [ %.lcssa.ph, %for.cond.cleanup9.loopexit.unr-lcssa ], [ %i11, %for.body10.epil ]
+ %add.ptr31 = getelementptr inbounds i32, i32* %add.ptr28, i64 %i4
+ %i12 = bitcast i32* %add.ptr31 to i8*
+ tail call void @llvm.x86.tilestored64.internal(i16 16, i16 64, i8* %i12, i64 %mul24, x86_amx %c.sroa.8127.2.in.lcssa)
%indvars.iv.next200 = add nuw nsw i64 %indvars.iv199, 1
%exitcond204.not = icmp eq i64 %indvars.iv.next200, %wide.trip.count203
br i1 %exitcond204.not, label %for.cond.cleanup5, label %for.body6
-for.body10: ; preds = %for.body10.preheader, %for.body10
+for.body10: ; preds = %for.body10, %for.body10.preheader
%indvars.iv = phi i64 [ %indvars.iv.next.7, %for.body10 ], [ 0, %for.body10.preheader ]
- %c.sroa.8127.2.in164 = phi x86_amx [ %68, %for.body10 ], [ %3, %for.body10.preheader ]
+ %c.sroa.8127.2.in164 = phi x86_amx [ %i68, %for.body10 ], [ %i3, %for.body10.preheader ]
%niter = phi i64 [ %niter.nsub.7, %for.body10 ], [ %unroll_iter, %for.body10.preheader ]
- %13 = shl nsw i64 %indvars.iv, 4
- %add.ptr14 = getelementptr inbounds i32, i32* %add.ptr, i64 %13
- %14 = bitcast i32* %add.ptr14 to i8*
- %15 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* %14, i64 %mul15)
- %16 = mul nsw i64 %13, %conv23
- %add.ptr22 = getelementptr inbounds i32, i32* %add.ptr19, i64 %16
- %17 = bitcast i32* %add.ptr22 to i8*
- %18 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* %17, i64 %mul24)
- %19 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %c.sroa.8127.2.in164, x86_amx %15, x86_amx %18)
+ %i13 = shl nsw i64 %indvars.iv, 4
+ %add.ptr14 = getelementptr inbounds i32, i32* %add.ptr, i64 %i13
+ %i14 = bitcast i32* %add.ptr14 to i8*
+ %i15 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* %i14, i64 %mul15)
+ %i16 = mul nsw i64 %i13, %conv23
+ %add.ptr22 = getelementptr inbounds i32, i32* %add.ptr19, i64 %i16
+ %i17 = bitcast i32* %add.ptr22 to i8*
+ %i18 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* %i17, i64 %mul24)
+ %i19 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %c.sroa.8127.2.in164, x86_amx %i15, x86_amx %i18)
%indvars.iv.next = shl i64 %indvars.iv, 4
- %20 = or i64 %indvars.iv.next, 16
- %add.ptr14.1 = getelementptr inbounds i32, i32* %add.ptr, i64 %20
- %21 = bitcast i32* %add.ptr14.1 to i8*
- %22 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %21, i64 %mul15)
- %23 = mul nsw i64 %20, %conv23
- %add.ptr22.1 = getelementptr inbounds i32, i32* %add.ptr19, i64 %23
- %24 = bitcast i32* %add.ptr22.1 to i8*
- %25 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %24, i64 %mul24)
- %26 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %19, x86_amx %22, x86_amx %25)
+ %i20 = or i64 %indvars.iv.next, 16
+ %add.ptr14.1 = getelementptr inbounds i32, i32* %add.ptr, i64 %i20
+ %i21 = bitcast i32* %add.ptr14.1 to i8*
+ %i22 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i21, i64 %mul15)
+ %i23 = mul nsw i64 %i20, %conv23
+ %add.ptr22.1 = getelementptr inbounds i32, i32* %add.ptr19, i64 %i23
+ %i24 = bitcast i32* %add.ptr22.1 to i8*
+ %i25 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i24, i64 %mul24)
+ %i26 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %i19, x86_amx %i22, x86_amx %i25)
%indvars.iv.next.1 = shl i64 %indvars.iv, 4
- %27 = or i64 %indvars.iv.next.1, 32
- %add.ptr14.2 = getelementptr inbounds i32, i32* %add.ptr, i64 %27
- %28 = bitcast i32* %add.ptr14.2 to i8*
- %29 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %28, i64 %mul15)
- %30 = mul nsw i64 %27, %conv23
- %add.ptr22.2 = getelementptr inbounds i32, i32* %add.ptr19, i64 %30
- %31 = bitcast i32* %add.ptr22.2 to i8*
- %32 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %31, i64 %mul24)
- %33 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %26, x86_amx %29, x86_amx %32)
+ %i27 = or i64 %indvars.iv.next.1, 32
+ %add.ptr14.2 = getelementptr inbounds i32, i32* %add.ptr, i64 %i27
+ %i28 = bitcast i32* %add.ptr14.2 to i8*
+ %i29 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i28, i64 %mul15)
+ %i30 = mul nsw i64 %i27, %conv23
+ %add.ptr22.2 = getelementptr inbounds i32, i32* %add.ptr19, i64 %i30
+ %i31 = bitcast i32* %add.ptr22.2 to i8*
+ %i32 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i31, i64 %mul24)
+ %i33 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %i26, x86_amx %i29, x86_amx %i32)
%indvars.iv.next.2 = shl i64 %indvars.iv, 4
- %34 = or i64 %indvars.iv.next.2, 48
- %add.ptr14.3 = getelementptr inbounds i32, i32* %add.ptr, i64 %34
- %35 = bitcast i32* %add.ptr14.3 to i8*
- %36 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %35, i64 %mul15)
- %37 = mul nsw i64 %34, %conv23
- %add.ptr22.3 = getelementptr inbounds i32, i32* %add.ptr19, i64 %37
- %38 = bitcast i32* %add.ptr22.3 to i8*
- %39 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %38, i64 %mul24)
- %40 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %33, x86_amx %36, x86_amx %39)
+ %i34 = or i64 %indvars.iv.next.2, 48
+ %add.ptr14.3 = getelementptr inbounds i32, i32* %add.ptr, i64 %i34
+ %i35 = bitcast i32* %add.ptr14.3 to i8*
+ %i36 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i35, i64 %mul15)
+ %i37 = mul nsw i64 %i34, %conv23
+ %add.ptr22.3 = getelementptr inbounds i32, i32* %add.ptr19, i64 %i37
+ %i38 = bitcast i32* %add.ptr22.3 to i8*
+ %i39 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i38, i64 %mul24)
+ %i40 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %i33, x86_amx %i36, x86_amx %i39)
%indvars.iv.next.3 = shl i64 %indvars.iv, 4
- %41 = or i64 %indvars.iv.next.3, 64
- %add.ptr14.4 = getelementptr inbounds i32, i32* %add.ptr, i64 %41
- %42 = bitcast i32* %add.ptr14.4 to i8*
- %43 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %42, i64 %mul15)
- %44 = mul nsw i64 %41, %conv23
- %add.ptr22.4 = getelementptr inbounds i32, i32* %add.ptr19, i64 %44
- %45 = bitcast i32* %add.ptr22.4 to i8*
- %46 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %45, i64 %mul24)
- %47 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %40, x86_amx %43, x86_amx %46)
+ %i41 = or i64 %indvars.iv.next.3, 64
+ %add.ptr14.4 = getelementptr inbounds i32, i32* %add.ptr, i64 %i41
+ %i42 = bitcast i32* %add.ptr14.4 to i8*
+ %i43 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i42, i64 %mul15)
+ %i44 = mul nsw i64 %i41, %conv23
+ %add.ptr22.4 = getelementptr inbounds i32, i32* %add.ptr19, i64 %i44
+ %i45 = bitcast i32* %add.ptr22.4 to i8*
+ %i46 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i45, i64 %mul24)
+ %i47 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %i40, x86_amx %i43, x86_amx %i46)
%indvars.iv.next.4 = shl i64 %indvars.iv, 4
- %48 = or i64 %indvars.iv.next.4, 80
- %add.ptr14.5 = getelementptr inbounds i32, i32* %add.ptr, i64 %48
- %49 = bitcast i32* %add.ptr14.5 to i8*
- %50 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %49, i64 %mul15)
- %51 = mul nsw i64 %48, %conv23
- %add.ptr22.5 = getelementptr inbounds i32, i32* %add.ptr19, i64 %51
- %52 = bitcast i32* %add.ptr22.5 to i8*
- %53 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %52, i64 %mul24)
- %54 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %47, x86_amx %50, x86_amx %53)
+ %i48 = or i64 %indvars.iv.next.4, 80
+ %add.ptr14.5 = getelementptr inbounds i32, i32* %add.ptr, i64 %i48
+ %i49 = bitcast i32* %add.ptr14.5 to i8*
+ %i50 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i49, i64 %mul15)
+ %i51 = mul nsw i64 %i48, %conv23
+ %add.ptr22.5 = getelementptr inbounds i32, i32* %add.ptr19, i64 %i51
+ %i52 = bitcast i32* %add.ptr22.5 to i8*
+ %i53 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i52, i64 %mul24)
+ %i54 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %i47, x86_amx %i50, x86_amx %i53)
%indvars.iv.next.5 = shl i64 %indvars.iv, 4
- %55 = or i64 %indvars.iv.next.5, 96
- %add.ptr14.6 = getelementptr inbounds i32, i32* %add.ptr, i64 %55
- %56 = bitcast i32* %add.ptr14.6 to i8*
- %57 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %56, i64 %mul15)
- %58 = mul nsw i64 %55, %conv23
- %add.ptr22.6 = getelementptr inbounds i32, i32* %add.ptr19, i64 %58
- %59 = bitcast i32* %add.ptr22.6 to i8*
- %60 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %59, i64 %mul24)
- %61 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %54, x86_amx %57, x86_amx %60)
+ %i55 = or i64 %indvars.iv.next.5, 96
+ %add.ptr14.6 = getelementptr inbounds i32, i32* %add.ptr, i64 %i55
+ %i56 = bitcast i32* %add.ptr14.6 to i8*
+ %i57 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i56, i64 %mul15)
+ %i58 = mul nsw i64 %i55, %conv23
+ %add.ptr22.6 = getelementptr inbounds i32, i32* %add.ptr19, i64 %i58
+ %i59 = bitcast i32* %add.ptr22.6 to i8*
+ %i60 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i59, i64 %mul24)
+ %i61 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %i54, x86_amx %i57, x86_amx %i60)
%indvars.iv.next.6 = shl i64 %indvars.iv, 4
- %62 = or i64 %indvars.iv.next.6, 112
- %add.ptr14.7 = getelementptr inbounds i32, i32* %add.ptr, i64 %62
- %63 = bitcast i32* %add.ptr14.7 to i8*
- %64 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %63, i64 %mul15)
- %65 = mul nsw i64 %62, %conv23
- %add.ptr22.7 = getelementptr inbounds i32, i32* %add.ptr19, i64 %65
- %66 = bitcast i32* %add.ptr22.7 to i8*
- %67 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %66, i64 %mul24)
- %68 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %61, x86_amx %64, x86_amx %67)
+ %i62 = or i64 %indvars.iv.next.6, 112
+ %add.ptr14.7 = getelementptr inbounds i32, i32* %add.ptr, i64 %i62
+ %i63 = bitcast i32* %add.ptr14.7 to i8*
+ %i64 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i63, i64 %mul15)
+ %i65 = mul nsw i64 %i62, %conv23
+ %add.ptr22.7 = getelementptr inbounds i32, i32* %add.ptr19, i64 %i65
+ %i66 = bitcast i32* %add.ptr22.7 to i8*
+ %i67 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 64, i8* nonnull %i66, i64 %mul24)
+ %i68 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 16, i16 64, i16 64, x86_amx %i61, x86_amx %i64, x86_amx %i67)
%indvars.iv.next.7 = add nuw nsw i64 %indvars.iv, 8
%niter.nsub.7 = add i64 %niter, -8
%niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
More information about the llvm-commits
mailing list