[llvm] 08ddbab - [X86][AMX] Fix missing stride register for tileloadd (#110226)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 14 22:02:03 PDT 2024


Author: Phoebe Wang
Date: 2024-10-15T13:02:00+08:00
New Revision: 08ddbab866cb76619f0f4952dc11bab2a9ee1147

URL: https://github.com/llvm/llvm-project/commit/08ddbab866cb76619f0f4952dc11bab2a9ee1147
DIFF: https://github.com/llvm/llvm-project/commit/08ddbab866cb76619f0f4952dc11bab2a9ee1147.diff

LOG: [X86][AMX] Fix missing stride register for tileloadd (#110226)

Fixes: #110190

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86LowerTileCopy.cpp
    llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
    llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
index 613722b398f446..9cf700d6b65b05 100644
--- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp
+++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
@@ -140,14 +140,16 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr *NewMI =
           addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS)
               .addReg(SrcReg, getKillRegState(SrcMO.isKill()));
-      MachineOperand &MO = NewMI->getOperand(2);
-      MO.setReg(GR64Cand ? GR64Cand : X86::RAX);
-      MO.setIsKill(true);
+      MachineOperand *MO = &NewMI->getOperand(X86::AddrIndexReg);
+      MO->setReg(GR64Cand ? GR64Cand : X86::RAX);
       // tileloadd (%sp, %idx), %tmm
       Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
 #undef GET_EGPR_IF_ENABLED
       NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
                                 TileSS);
+      MO = &NewMI->getOperand(1 + X86::AddrIndexReg);
+      MO->setReg(GR64Cand ? GR64Cand : X86::RAX);
+      MO->setIsKill(true);
       if (!GR64Cand) {
         // restore %rax
         // mov (%sp) %rax

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
index e9bb68e711f889..503b39919b5502 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
@@ -45,7 +45,7 @@ define dso_local void @test1(ptr%buf) nounwind {
 ; CHECK-NEXT:    tileloadd (%rbx,%r15), %tmm0
 ; CHECK-NEXT:    tileloadd (%rbx,%r15), %tmm1
 ; CHECK-NEXT:    tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
-; CHECK-NEXT:    tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
+; CHECK-NEXT:    tileloadd 1024(%rsp,%rax), %tmm2 # 1024-byte Folded Reload
 ; CHECK-NEXT:    tdpbssd %tmm1, %tmm0, %tmm2
 ; CHECK-NEXT:    tilestored %tmm2, (%rbx,%r15)
 ; CHECK-NEXT:    incl %r14d
@@ -109,8 +109,8 @@ define dso_local void @test1(ptr%buf) nounwind {
 ; EGPR-NEXT:    tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b]
 ; EGPR-NEXT:    tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
 ; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0x00,0x04,0x00,0x00]
-; EGPR-NEXT:    tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
-; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x24,0x00,0x04,0x00,0x00]
+; EGPR-NEXT:    tileloadd 1024(%rsp,%rax), %tmm2 # 1024-byte Folded Reload
+; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x04,0x00,0x04,0x00,0x00]
 ; EGPR-NEXT:    tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0]
 ; EGPR-NEXT:    tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b]
 ; EGPR-NEXT:    incl %r14d # encoding: [0x41,0xff,0xc6]

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
index ec087f23248c4c..6ef7219cfebdb5 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
@@ -148,7 +148,7 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind {
 ; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    movabsq $64, %rax
 ; CHECK-NEXT:    tilestored %tmm0, 3072(%rsp,%rax) # 1024-byte Folded Spill
-; CHECK-NEXT:    tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload
+; CHECK-NEXT:    tileloadd 3072(%rsp,%rax), %tmm1 # 1024-byte Folded Reload
 ; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; CHECK-NEXT:    jmp .LBB1_4
   %4 = shl i32 %2, 4
@@ -212,7 +212,7 @@ define void @multi_use() nounwind {
 ; CHECK-NEXT:    tilezero %tmm0
 ; CHECK-NEXT:    movabsq $64, %rbp
 ; CHECK-NEXT:    tilestored %tmm0, 896(%rsp,%rbp) # 1024-byte Folded Spill
-; CHECK-NEXT:    tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload
+; CHECK-NEXT:    tileloadd 896(%rsp,%rbp), %tmm1 # 1024-byte Folded Reload
 ; CHECK-NEXT:    tdpbf16ps %tmm0, %tmm0, %tmm1
 ; CHECK-NEXT:    tdpbf16ps %tmm0, %tmm0, %tmm0
 ; CHECK-NEXT:    addq $2928, %rsp # imm = 0xB70


        


More information about the llvm-commits mailing list