[llvm] 2e44b07 - [X86] Do not directly fold for VINSERTPS (#65718)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 8 00:35:48 PDT 2023


Author: Phoebe Wang
Date: 2023-09-08T15:35:44+08:00
New Revision: 2e44b07e240b7176a9d08ac84367778f10f7b061

URL: https://github.com/llvm/llvm-project/commit/2e44b07e240b7176a9d08ac84367778f10f7b061
DIFF: https://github.com/llvm/llvm-project/commit/2e44b07e240b7176a9d08ac84367778f10f7b061.diff

LOG: [X86] Do not directly fold for VINSERTPS (#65718)

We have already customized folding for VINSERTPS by 7e6606f4f1, which do
the folding when alignment >= 4 bytes.

We cannot arbitrarily fold it like others because we need to calculate
the source offset.

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/avx.ll
    llvm/test/TableGen/x86-fold-tables.inc
    llvm/utils/TableGen/X86ManualFoldTables.def

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/avx.ll b/llvm/test/CodeGen/X86/avx.ll
index dc59186d568cc2c..6a10d855ea95c0f 100644
--- a/llvm/test/CodeGen/X86/avx.ll
+++ b/llvm/test/CodeGen/X86/avx.ll
@@ -184,12 +184,14 @@ define <4 x float> @nofold_insertps(ptr %a, <4 x float> %b) {
 ; X86-LABEL: nofold_insertps:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    vinsertps $176, (%eax), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[2]
+; X86-NEXT:    vmovups (%eax), %xmm1
+; X86-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: nofold_insertps:
 ; X64:       ## %bb.0:
-; X64-NEXT:    vinsertps $176, (%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[2]
+; X64-NEXT:    vmovups (%rdi), %xmm1
+; X64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
 ; X64-NEXT:    retq
   %1 = load <4 x float>, ptr %a, align 1
   %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 6>

diff  --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 6168fdfd323c3b5..0e6b8b04f3ca758 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -2105,8 +2105,6 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   {X86::VINSERTI64x2Z256rr, X86::VINSERTI64x2Z256rm, 0},
   {X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0},
   {X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0},
-  {X86::VINSERTPSZrr, X86::VINSERTPSZrm, TB_NO_REVERSE},
-  {X86::VINSERTPSrr, X86::VINSERTPSrm, TB_NO_REVERSE},
   {X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0},
   {X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0},
   {X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0},

diff  --git a/llvm/utils/TableGen/X86ManualFoldTables.def b/llvm/utils/TableGen/X86ManualFoldTables.def
index d949830b0988304..8e6cb4a7bd87983 100644
--- a/llvm/utils/TableGen/X86ManualFoldTables.def
+++ b/llvm/utils/TableGen/X86ManualFoldTables.def
@@ -225,6 +225,8 @@ NOFOLD(MMX_MOVQ64rr_REV)
 // =>
 // insertpsrm xmm1, m32, imm
 NOFOLD(INSERTPSrr)
+NOFOLD(VINSERTPSZrr)
+NOFOLD(VINSERTPSrr)
 #undef NOFOLD
 
 #ifndef ENTRY


        


More information about the llvm-commits mailing list