[PATCH] D91927: [X86] Add x86_amx type for intel AMX.

LuoYuanke via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 22 22:04:44 PST 2020


LuoYuanke added inline comments.


================
Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:265
+        // If the dst type is <256 x i32>*, it is valid intruction.
+        // %0 = bitcast x86_amx* %tile to <256 x i32>*
+        // %1 = load <256 x i32>, <256 x i32>* %0, align 64
----------------
pengfei wrote:
> Where's `x86_amx* %tile` from? Shouldn't been transfered to `x86_amx` before this bitcast if it exists?
In my test case, it is transformed after Combine redundant instructions.
```
*** IR Dump After Simplify the CFG ***
define internal fastcc void @_ZL12__tile_loaddP15__tile1024i_strPKvm(%struct.__tile1024i_str* nocapture %dst) unnamed_addr #4 {
entry:
  %row = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 0
  %0 = load i16, i16* %row, align 64, !tbaa !2
  %col = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 1
  %1 = load i16, i16* %col, align 2, !tbaa !7
  %2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %0, i16 %1, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 64) #6
  %3 = bitcast x86_amx %2 to <256 x i32>
  %tile = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 3
  store <256 x i32> %3, <256 x i32>* %tile, align 64, !tbaa !8
  ret void
}
```

To

```
*** IR Dump After Combine redundant instructions ***
; Function Attrs: alwaysinline nounwind uwtable mustprogress
define internal fastcc void @_ZL12__tile_loaddP15__tile1024i_strPKvm(%struct.__tile1024i_str* nocapture %dst) unnamed_addr #4 {
entry:
  %row = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 0
  %0 = load i16, i16* %row, align 64, !tbaa !2
  %col = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 1
  %1 = load i16, i16* %col, align 2, !tbaa !7
  %2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %0, i16 %1, i8* getelementptr inbounds ([1024 x i8], [1024
x i8]* @buf, i64 0, i64 0), i64 64) #6
  %tile = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 3
  %3 = bitcast <256 x i32>* %tile to x86_amx*
  store x86_amx %2, x86_amx* %3, align 64, !tbaa !8
  ret void
}
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91927/new/

https://reviews.llvm.org/D91927



More information about the llvm-commits mailing list