[PATCH] D137923: [X86][AMX] Fix the shape dependency issue.
Xiang Zhang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 00:54:00 PST 2022
xiangzhangllvm added a comment.
How about merge the " load + cast" to the cast position not load.
for example generate the tileload for line 95 105 to line 105:
89 *** IR Dump After Lower AMX intrinsics (lower-amx-intrinsics) ***
90 define void @test_tile_dpbssd(ptr byval(%struct.__tile1024i_str) align 64 %a, ptr byval(%struct.__tile1024i_str) align 64 %b, ptr byval(%struct.__tile1024i_str) alig n 64 %c) {
91 entry:
92 %b.row.ptr = getelementptr inbounds i8, ptr %b, i64 2
93 %b.row = load i16, ptr %b.row.ptr, align 2
94 %b.tile.ptr = getelementptr inbounds i8, ptr %b, i64 64
95 %b.tile = load <256 x i32>, ptr %b.tile.ptr, align 64
96 %a.row = load i16, ptr %a, align 64
97 %a.col.ptr = getelementptr inbounds i8, ptr %a, i64 2
98 %a.col = load i16, ptr %a.col.ptr, align 2
99 %a.tile.ptr = getelementptr inbounds i8, ptr %a, i64 64
100 %a.tile = load <256 x i32>, ptr %a.tile.ptr, align 64
101 %c.tile.ptr = getelementptr inbounds %struct.__tile1024i_str, ptr %c, i64 0, i32 3
102 %c.tile = load <256 x i32>, ptr %c.tile.ptr, align 64
103 %c.amx = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %c.tile)
104 %a.amx = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %a.tile)
105 %b.amx = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %b.tile)
106 %res = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %a.row, i16 %b.row, i16 %a.col, x86_amx %c.amx, x86_amx %a.amx, x86_amx %b.amx)
107 ret void
108 }
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D137923/new/
https://reviews.llvm.org/D137923
More information about the llvm-commits
mailing list