[clang] [llvm] [X86][AMX] Support AMX-TRANSPOSE (PR #113532)

Thu Oct 31 08:35:32 PDT 2024

================
@@ -919,23 +1017,66 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi(
   return true;
 }
 
+static Value *getShapeFromAMXIntrinsic(Value *Inst, unsigned ShapeIdx,
+                                       bool IsRow) {
+  if (!isAMXIntrinsic(Inst))
+    return nullptr;
+
+  auto *II = cast<IntrinsicInst>(Inst);
+  if (IsRow)
+    return II->getOperand(0);
+
+  assert(ShapeIdx < 2 && "Currently 2 shapes in 1 instruction at most!");
+  return II->getOperand(ShapeIdx + 1);
+}
+
 // %43 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %42)
 // store <256 x i32> %43, <256 x i32>* %p, align 64
 // -->
 // call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %p,
 //                                           i64 64, x86_amx %42)
 bool X86LowerAMXCast::combineCastStore(IntrinsicInst *Cast, StoreInst *ST) {
   Value *Tile = Cast->getOperand(0);
-  // TODO: If it is cast intrinsic or phi node, we can propagate the
-  // shape information through def-use chain.
-  if (!isAMXIntrinsic(Tile))
+
+  assert(Tile->getType()->isX86_AMXTy() && "Not Tile Operand!");
+
+  // TODO: Specially handle the mult-use case.
----------------
fzou1 wrote:

mult->multi. The same below.

https://github.com/llvm/llvm-project/pull/113532