[PATCH] D91927: [X86] Add x86_amx type for intel AMX.
LuoYuanke via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 22 22:04:44 PST 2020
LuoYuanke added inline comments.
================
Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:265
+ // If the dst type is <256 x i32>*, it is valid intruction.
+ // %0 = bitcast x86_amx* %tile to <256 x i32>*
+ // %1 = load <256 x i32>, <256 x i32>* %0, align 64
----------------
pengfei wrote:
> Where's `x86_amx* %tile` from? Shouldn't been transfered to `x86_amx` before this bitcast if it exists?
In my test case, it is transformed after Combine redundant instructions.
```
*** IR Dump After Simplify the CFG ***
define internal fastcc void @_ZL12__tile_loaddP15__tile1024i_strPKvm(%struct.__tile1024i_str* nocapture %dst) unnamed_addr #4 {
entry:
%row = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 0
%0 = load i16, i16* %row, align 64, !tbaa !2
%col = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 1
%1 = load i16, i16* %col, align 2, !tbaa !7
%2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %0, i16 %1, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 64) #6
%3 = bitcast x86_amx %2 to <256 x i32>
%tile = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 3
store <256 x i32> %3, <256 x i32>* %tile, align 64, !tbaa !8
ret void
}
```
To
```
*** IR Dump After Combine redundant instructions ***
; Function Attrs: alwaysinline nounwind uwtable mustprogress
define internal fastcc void @_ZL12__tile_loaddP15__tile1024i_strPKvm(%struct.__tile1024i_str* nocapture %dst) unnamed_addr #4 {
entry:
%row = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 0
%0 = load i16, i16* %row, align 64, !tbaa !2
%col = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 1
%1 = load i16, i16* %col, align 2, !tbaa !7
%2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %0, i16 %1, i8* getelementptr inbounds ([1024 x i8], [1024
x i8]* @buf, i64 0, i64 0), i64 64) #6
%tile = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 3
%3 = bitcast <256 x i32>* %tile to x86_amx*
store x86_amx %2, x86_amx* %3, align 64, !tbaa !8
ret void
}
```
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D91927/new/
https://reviews.llvm.org/D91927
More information about the llvm-commits
mailing list