[llvm] ff5301d - [X86] Regenerate test checks (NFC)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 22 08:02:03 PDT 2022
Author: Nikita Popov
Date: 2022-06-22T17:00:10+02:00
New Revision: ff5301dde9815beee7c4410b3a01d137df636364
URL: https://github.com/llvm/llvm-project/commit/ff5301dde9815beee7c4410b3a01d137df636364
DIFF: https://github.com/llvm/llvm-project/commit/ff5301dde9815beee7c4410b3a01d137df636364.diff
LOG: [X86] Regenerate test checks (NFC)
This runs the test through -instnamer and generates test checks
using update_test_checks.py. (The previous comment indicated that
update_llc_test_checks.py was used, but I rather doubt that.)
This relies on the non-determinism fix from
fbb72530fe80a95678a7d643d7a3f5ee8d693c93,
the previous check lines have apparently been written to accomodate
that non-determinism.
Added:
Modified:
llvm/test/CodeGen/X86/AMX/amx-configO2toO0-precfg.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-precfg.ll b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-precfg.ll
index 8bd5f94c36a0..1e25b896bee6 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-precfg.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-precfg.ll
@@ -1,199 +1,200 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -pre-amx-config -S | FileCheck %s
@buf = dso_local global [1024 x i8] zeroinitializer, align 16
@buf2 = dso_local global [1024 x i8] zeroinitializer, align 16
-; Function Attrs: nounwind uwtable
define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) local_unnamed_addr {
-; CHECK-LABEL: entry:
-; CHECK: %{{[0-9]+}} = alloca <16 x i32>, align 4
-; CHECK-NEXT: %{{[0-9]+}} = alloca <16 x i32>, align 4
-; CHECK-NEXT: %{{[0-9]+}} = alloca <16 x i32>, align 4
-; CHECK-NEXT: %{{[0-9]+}} = alloca <16 x i32>, align 4
-; CHECK-NEXT: %{{[0-9]+}} = alloca <16 x i32>, align 4
-; CHECK-NEXT: %{{[0-9]+}} = alloca <16 x i32>, align 4
-; CHECK-NEXT: %{{[0-9]+}} = alloca <16 x i32>, align 4
-; CHECK-NEXT: %{{[0-9]+}} = alloca <16 x i32>, align 4
-; CHECK-NEXT: %{{[0-9]+}} = alloca <256 x i32>, align 1024
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <256 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: %{{[0-9]+}} = alloca <256 x i32>, align 1024
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <256 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: %{{[0-9]+}} = alloca <256 x i32>, align 1024
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <256 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: %{{[0-9]+}} = alloca <256 x i32>, align 1024
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <256 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: %tobool.not = icmp eq i32 %cond, 0
-; CHECK-NEXT: br i1 %tobool.not, label %if.else, label %if.then
-; CHECK: if.then:
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
-; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
-; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
-; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 8, i16* %amx.tmm.0.shape.col{{.*}}, align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 %row, i16 8, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
-; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
-; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
-; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 8 to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
-; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
-; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
-; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
-; CHECK-NEXT: br label %if.end
-; CHECK: if.else:
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
-; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
-; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
-; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 8, i16* %amx.tmm.0.shape.col{{.*}}, align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 %row, i16 8, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
-; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
-; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
-; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 8 to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
-; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
-; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
-; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
-; CHECK-NEXT: br label %if.end
-; CHECK: if.end: ; preds = %if.else, %if.then
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
-; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
-; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
-; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
-; CHECK-NEXT: %amx.tmm.1.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 49
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 18
-; CHECK-NEXT: %amx.tmm.1.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.1.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 8, i16* %amx.tmm.1.shape.col{{.*}}, align 2
-; CHECK-NEXT: %amx.tmm.2.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 50
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 20
-; CHECK-NEXT: %amx.tmm.2.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 8 to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.2.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 %col, i16* %amx.tmm.2.shape.col{{.*}}, align 2
-; CHECK-NEXT: %amx.tmm.3.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 51
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 22
-; CHECK-NEXT: %amx.tmm.3.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.3.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 %col, i16* %amx.tmm.3.shape.col{{.*}}, align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* %{{[0-9]+}}, i64 64)
-; CHECK-NEXT: %{{[0-9]+}} = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* %{{[0-9]+}}, i64 64)
-; CHECK-NEXT: %{{[0-9]+}} = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* %{{[0-9]+}}, i64 64)
-; CHECK-NEXT: %{{[0-9]+}} = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col, i16 8, x86_amx %{{[0-9]+}}, x86_amx %{{[0-9]+}}, x86_amx %{{[0-9]+}})
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %{{[0-9]+}}, i64 64, x86_amx %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = bitcast <16 x i32>* %{{[0-9]+}} to i8*
-; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* %{{[0-9]+}}, align 4
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 0
-; CHECK-NEXT: store i8 1, i8* %{{[0-9]+}}, align 1
-; CHECK-NEXT: %amx.tmm.0.shape.row{{.*}} = getelementptr i8, i8* %{{[0-9]+}}, i64 48
-; CHECK-NEXT: %{{[0-9]+}} = getelementptr i8, i8* %{{[0-9]+}}, i64 16
-; CHECK-NEXT: %amx.tmm.0.shape.col{{.*}} = bitcast i8* %{{[0-9]+}} to i16*
-; CHECK-NEXT: %{{[0-9]+}} = trunc i16 %row to i8
-; CHECK-NEXT: store i8 %{{[0-9]+}}, i8* %amx.tmm.0.shape.row{{.*}}, align 1
-; CHECK-NEXT: store i16 %col, i16* %amx.tmm.0.shape.col{{.*}}, align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* %{{[0-9]+}})
-; CHECK-NEXT: %{{[0-9]+}} = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* %{{[0-9]+}}, i64 64)
-; CHECK-NEXT: tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32, x86_amx %{{[0-9]+}})
-; CHECK-NEXT: ret void
+; CHECK-LABEL: @test_api(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = alloca <16 x i32>, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = alloca <16 x i32>, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = alloca <16 x i32>, align 4
+; CHECK-NEXT: [[TMP3:%.*]] = alloca <16 x i32>, align 4
+; CHECK-NEXT: [[TMP4:%.*]] = alloca <16 x i32>, align 4
+; CHECK-NEXT: [[TMP5:%.*]] = alloca <16 x i32>, align 4
+; CHECK-NEXT: [[TMP6:%.*]] = alloca <16 x i32>, align 4
+; CHECK-NEXT: [[TMP7:%.*]] = alloca <16 x i32>, align 4
+; CHECK-NEXT: [[I:%.*]] = alloca <256 x i32>, align 1024
+; CHECK-NEXT: [[I1:%.*]] = bitcast <256 x i32>* [[I]] to i8*
+; CHECK-NEXT: [[I2:%.*]] = alloca <256 x i32>, align 1024
+; CHECK-NEXT: [[I3:%.*]] = bitcast <256 x i32>* [[I2]] to i8*
+; CHECK-NEXT: [[I4:%.*]] = alloca <256 x i32>, align 1024
+; CHECK-NEXT: [[I5:%.*]] = bitcast <256 x i32>* [[I4]] to i8*
+; CHECK-NEXT: [[I6:%.*]] = alloca <256 x i32>, align 1024
+; CHECK-NEXT: [[I7:%.*]] = bitcast <256 x i32>* [[I6]] to i8*
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[COND:%.*]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32>* [[TMP7]] to i8*
+; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP7]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[TMP8]], i64 0
+; CHECK-NEXT: store i8 1, i8* [[TMP9]], align 1
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW:%.*]] = getelementptr i8, i8* [[TMP8]], i64 48
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[TMP8]], i64 16
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL:%.*]] = bitcast i8* [[TMP10]] to i16*
+; CHECK-NEXT: [[TMP11:%.*]] = trunc i16 [[ROW:%.*]] to i8
+; CHECK-NEXT: store i8 [[TMP11]], i8* [[AMX_TMM_0_SHAPE_ROW]], align 1
+; CHECK-NEXT: store i16 8, i16* [[AMX_TMM_0_SHAPE_COL]], align 2
+; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP8]])
+; CHECK-NEXT: [[I8:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
+; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 8, i8* [[I5]], i64 64, x86_amx [[I8]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32>* [[TMP6]] to i8*
+; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP6]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[TMP12]], i64 0
+; CHECK-NEXT: store i8 1, i8* [[TMP13]], align 1
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW1:%.*]] = getelementptr i8, i8* [[TMP12]], i64 48
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[TMP12]], i64 16
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL2:%.*]] = bitcast i8* [[TMP14]] to i16*
+; CHECK-NEXT: [[TMP15:%.*]] = trunc i16 8 to i8
+; CHECK-NEXT: store i8 [[TMP15]], i8* [[AMX_TMM_0_SHAPE_ROW1]], align 1
+; CHECK-NEXT: store i16 [[COL:%.*]], i16* [[AMX_TMM_0_SHAPE_COL2]], align 2
+; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP12]])
+; CHECK-NEXT: [[I9:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 [[COL]], i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
+; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 [[COL]], i8* [[I3]], i64 64, x86_amx [[I9]])
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i32>* [[TMP5]] to i8*
+; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP5]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[TMP16]], i64 0
+; CHECK-NEXT: store i8 1, i8* [[TMP17]], align 1
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW3:%.*]] = getelementptr i8, i8* [[TMP16]], i64 48
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP16]], i64 16
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL4:%.*]] = bitcast i8* [[TMP18]] to i16*
+; CHECK-NEXT: [[TMP19:%.*]] = trunc i16 [[ROW]] to i8
+; CHECK-NEXT: store i8 [[TMP19]], i8* [[AMX_TMM_0_SHAPE_ROW3]], align 1
+; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_0_SHAPE_COL4]], align 2
+; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP16]])
+; CHECK-NEXT: [[I10:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
+; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], i8* [[I1]], i64 64, x86_amx [[I10]])
+; CHECK-NEXT: br label [[IF_END:%.*]]
+; CHECK: if.else:
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i32>* [[TMP4]] to i8*
+; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[TMP20]], i64 0
+; CHECK-NEXT: store i8 1, i8* [[TMP21]], align 1
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW5:%.*]] = getelementptr i8, i8* [[TMP20]], i64 48
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[TMP20]], i64 16
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL6:%.*]] = bitcast i8* [[TMP22]] to i16*
+; CHECK-NEXT: [[TMP23:%.*]] = trunc i16 [[ROW]] to i8
+; CHECK-NEXT: store i8 [[TMP23]], i8* [[AMX_TMM_0_SHAPE_ROW5]], align 1
+; CHECK-NEXT: store i16 8, i16* [[AMX_TMM_0_SHAPE_COL6]], align 2
+; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP20]])
+; CHECK-NEXT: [[I11:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
+; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 8, i8* [[I5]], i64 64, x86_amx [[I11]])
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i32>* [[TMP3]] to i8*
+; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP3]], align 4
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, i8* [[TMP24]], i64 0
+; CHECK-NEXT: store i8 1, i8* [[TMP25]], align 1
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW7:%.*]] = getelementptr i8, i8* [[TMP24]], i64 48
+; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, i8* [[TMP24]], i64 16
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL8:%.*]] = bitcast i8* [[TMP26]] to i16*
+; CHECK-NEXT: [[TMP27:%.*]] = trunc i16 8 to i8
+; CHECK-NEXT: store i8 [[TMP27]], i8* [[AMX_TMM_0_SHAPE_ROW7]], align 1
+; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_0_SHAPE_COL8]], align 2
+; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP24]])
+; CHECK-NEXT: [[I12:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 [[COL]], i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
+; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 [[COL]], i8* [[I3]], i64 64, x86_amx [[I12]])
+; CHECK-NEXT: [[TMP28:%.*]] = bitcast <16 x i32>* [[TMP2]] to i8*
+; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP2]], align 4
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, i8* [[TMP28]], i64 0
+; CHECK-NEXT: store i8 1, i8* [[TMP29]], align 1
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW9:%.*]] = getelementptr i8, i8* [[TMP28]], i64 48
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, i8* [[TMP28]], i64 16
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL10:%.*]] = bitcast i8* [[TMP30]] to i16*
+; CHECK-NEXT: [[TMP31:%.*]] = trunc i16 [[ROW]] to i8
+; CHECK-NEXT: store i8 [[TMP31]], i8* [[AMX_TMM_0_SHAPE_ROW9]], align 1
+; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_0_SHAPE_COL10]], align 2
+; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP28]])
+; CHECK-NEXT: [[I13:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
+; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], i8* [[I1]], i64 64, x86_amx [[I13]])
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[TMP32:%.*]] = bitcast <16 x i32>* [[TMP1]] to i8*
+; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP1]], align 4
+; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, i8* [[TMP32]], i64 0
+; CHECK-NEXT: store i8 1, i8* [[TMP33]], align 1
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW11:%.*]] = getelementptr i8, i8* [[TMP32]], i64 48
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, i8* [[TMP32]], i64 16
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL12:%.*]] = bitcast i8* [[TMP34]] to i16*
+; CHECK-NEXT: [[TMP35:%.*]] = trunc i16 [[ROW]] to i8
+; CHECK-NEXT: store i8 [[TMP35]], i8* [[AMX_TMM_0_SHAPE_ROW11]], align 1
+; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_0_SHAPE_COL12]], align 2
+; CHECK-NEXT: [[AMX_TMM_1_SHAPE_ROW:%.*]] = getelementptr i8, i8* [[TMP32]], i64 49
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, i8* [[TMP32]], i64 18
+; CHECK-NEXT: [[AMX_TMM_1_SHAPE_COL:%.*]] = bitcast i8* [[TMP36]] to i16*
+; CHECK-NEXT: [[TMP37:%.*]] = trunc i16 [[ROW]] to i8
+; CHECK-NEXT: store i8 [[TMP37]], i8* [[AMX_TMM_1_SHAPE_ROW]], align 1
+; CHECK-NEXT: store i16 8, i16* [[AMX_TMM_1_SHAPE_COL]], align 2
+; CHECK-NEXT: [[AMX_TMM_2_SHAPE_ROW:%.*]] = getelementptr i8, i8* [[TMP32]], i64 50
+; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, i8* [[TMP32]], i64 20
+; CHECK-NEXT: [[AMX_TMM_2_SHAPE_COL:%.*]] = bitcast i8* [[TMP38]] to i16*
+; CHECK-NEXT: [[TMP39:%.*]] = trunc i16 8 to i8
+; CHECK-NEXT: store i8 [[TMP39]], i8* [[AMX_TMM_2_SHAPE_ROW]], align 1
+; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_2_SHAPE_COL]], align 2
+; CHECK-NEXT: [[AMX_TMM_3_SHAPE_ROW:%.*]] = getelementptr i8, i8* [[TMP32]], i64 51
+; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i8, i8* [[TMP32]], i64 22
+; CHECK-NEXT: [[AMX_TMM_3_SHAPE_COL:%.*]] = bitcast i8* [[TMP40]] to i16*
+; CHECK-NEXT: [[TMP41:%.*]] = trunc i16 [[ROW]] to i8
+; CHECK-NEXT: store i8 [[TMP41]], i8* [[AMX_TMM_3_SHAPE_ROW]], align 1
+; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_3_SHAPE_COL]], align 2
+; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP32]])
+; CHECK-NEXT: [[I14:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 8, i8* [[I5]], i64 64)
+; CHECK-NEXT: [[I15:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 [[COL]], i8* [[I3]], i64 64)
+; CHECK-NEXT: [[I16:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], i8* [[I1]], i64 64)
+; CHECK-NEXT: [[I17:%.*]] = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 [[ROW]], i16 [[COL]], i16 8, x86_amx [[I16]], x86_amx [[I14]], x86_amx [[I15]])
+; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], i8* [[I7]], i64 64, x86_amx [[I17]])
+; CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i32>* [[TMP0]] to i8*
+; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP0]], align 4
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, i8* [[TMP42]], i64 0
+; CHECK-NEXT: store i8 1, i8* [[TMP43]], align 1
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW13:%.*]] = getelementptr i8, i8* [[TMP42]], i64 48
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, i8* [[TMP42]], i64 16
+; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL14:%.*]] = bitcast i8* [[TMP44]] to i16*
+; CHECK-NEXT: [[TMP45:%.*]] = trunc i16 [[ROW]] to i8
+; CHECK-NEXT: store i8 [[TMP45]], i8* [[AMX_TMM_0_SHAPE_ROW13]], align 1
+; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_0_SHAPE_COL14]], align 2
+; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP42]])
+; CHECK-NEXT: [[I18:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], i8* [[I7]], i64 64)
+; CHECK-NEXT: tail call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32, x86_amx [[I18]])
+; CHECK-NEXT: ret void
+;
entry:
- %0 = alloca <256 x i32>, align 1024
- %1 = bitcast <256 x i32>* %0 to i8*
- %2 = alloca <256 x i32>, align 1024
- %3 = bitcast <256 x i32>* %2 to i8*
- %4 = alloca <256 x i32>, align 1024
- %5 = bitcast <256 x i32>* %4 to i8*
- %6 = alloca <256 x i32>, align 1024
- %7 = bitcast <256 x i32>* %6 to i8*
+ %i = alloca <256 x i32>, align 1024
+ %i1 = bitcast <256 x i32>* %i to i8*
+ %i2 = alloca <256 x i32>, align 1024
+ %i3 = bitcast <256 x i32>* %i2 to i8*
+ %i4 = alloca <256 x i32>, align 1024
+ %i5 = bitcast <256 x i32>* %i4 to i8*
+ %i6 = alloca <256 x i32>, align 1024
+ %i7 = bitcast <256 x i32>* %i6 to i8*
%tobool.not = icmp eq i32 %cond, 0
br i1 %tobool.not, label %if.else, label %if.then
if.then: ; preds = %entry
- %8 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
- call void @llvm.x86.tilestored64.internal(i16 %row, i16 8, i8* %5, i64 64, x86_amx %8)
- %9 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, i8* %3, i64 64, x86_amx %9)
- %10 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
- call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %1, i64 64, x86_amx %10)
+ %i8 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
+ call void @llvm.x86.tilestored64.internal(i16 %row, i16 8, i8* %i5, i64 64, x86_amx %i8)
+ %i9 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
+ call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, i8* %i3, i64 64, x86_amx %i9)
+ %i10 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
+ call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %i1, i64 64, x86_amx %i10)
br label %if.end
if.else: ; preds = %entry
- %11 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
- call void @llvm.x86.tilestored64.internal(i16 %row, i16 8, i8* %5, i64 64, x86_amx %11)
- %12 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, i8* %3, i64 64, x86_amx %12)
- %13 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
- call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %1, i64 64, x86_amx %13)
+ %i11 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
+ call void @llvm.x86.tilestored64.internal(i16 %row, i16 8, i8* %i5, i64 64, x86_amx %i11)
+ %i12 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
+ call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, i8* %i3, i64 64, x86_amx %i12)
+ %i13 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
+ call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %i1, i64 64, x86_amx %i13)
br label %if.end
if.end: ; preds = %if.else, %if.then
- %14 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* %5, i64 64)
- %15 = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* %3, i64 64)
- %16 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* %1, i64 64)
- %17 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col, i16 8, x86_amx %16, x86_amx %14, x86_amx %15)
- call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %7, i64 64, x86_amx %17)
- %18 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* %7, i64 64)
- tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32, x86_amx %18)
+ %i14 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* %i5, i64 64)
+ %i15 = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* %i3, i64 64)
+ %i16 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* %i1, i64 64)
+ %i17 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col, i16 8, x86_amx %i16, x86_amx %i14, x86_amx %i15)
+ call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %i7, i64 64, x86_amx %i17)
+ %i18 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, i8* %i7, i64 64)
+ tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32, x86_amx %i18)
ret void
}
More information about the llvm-commits
mailing list